library(tidyverse)
library(readxl)
library(broom)
library(gt)
In this R Markdown file, the Excel file that is read in is called analytic_data.xlxs. The data frame is called EXAMPLE_DATA. Replace these with the names of the files you wish to use.
EXAMPLE_DATA <- read_excel("analytic_data.xlsx")
EXAMPLE_DATA <- EXAMPLE_DATA %>%
mutate_if(is.character,as.factor)
In all of the code below, you will need to replace EXAMPLE_DATA with the name of your data frame. You will need to use the appropriate variable names.
Without assuming equal variances
t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, conf.level=0.95)
##
## Welch Two Sample t-test
##
## data: NUMERICAL_VARIABLE1 by CATEGORICAL_VARIABLE1
## t = -0.7219, df = 12.804, p-value = 0.4833
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
## -2.957998 1.477998
## sample estimates:
## mean in group A mean in group B
## 3.97 4.71
Assuming equal variances
t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, var.equal=TRUE, conf.level=0.95)
##
## Two Sample t-test
##
## data: NUMERICAL_VARIABLE1 by CATEGORICAL_VARIABLE1
## t = -0.7219, df = 18, p-value = 0.4796
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
## -2.893602 1.413602
## sample estimates:
## mean in group A mean in group B
## 3.97 4.71
A way to present the output
independentsamples <- t.test(NUMERICAL_VARIABLE1 ~ CATEGORICAL_VARIABLE1, data = EXAMPLE_DATA, conf.level=0.95)
tidy(independentsamples) %>%
select(estimate, conf.low, conf.high, p.value) %>%
gt() %>%
fmt_number(c(estimate, conf.low, conf.high),
decimals = 2) %>%
fmt_number(p.value, decimals = 3) %>%
cols_merge_range(conf.low, conf.high, sep = " to ") %>%
cols_align("center", everything()) %>%
cols_label(estimate = "Estimate of the difference of means", conf.low = "95% CI ",
p.value = "P-value")
| Estimate of the difference of means | 95% CI | P-value |
|---|---|---|
| −0.74 | −2.96 to 1.48 | 0.483 |
wilcox.test(x=EXAMPLE_DATA$NUMERICAL_VARIABLE1, y=EXAMPLE_DATA$NUMERICAL_VARIABLE2, conf.int=T)
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact confidence intervals
## with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## W = 186, p-value = 0.7148
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -1.200059 1.099938
## sample estimates:
## difference in location
## -0.199919
t.test(EXAMPLE_DATA$NUMERICAL_VARIABLE1, EXAMPLE_DATA$NUMERICAL_VARIABLE2, paired = TRUE)
##
## Paired t-test
##
## data: EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## t = 0.40192, df = 19, p-value = 0.6922
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.82048 1.21048
## sample estimates:
## mean difference
## 0.195
wilcox.test(x=EXAMPLE_DATA$NUMERICAL_VARIABLE1, y=EXAMPLE_DATA$NUMERICAL_VARIABLE2, paired = T, conf.int=T)
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = EXAMPLE_DATA$NUMERICAL_VARIABLE1, y =
## EXAMPLE_DATA$NUMERICAL_VARIABLE2, : cannot compute exact confidence interval
## with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: EXAMPLE_DATA$NUMERICAL_VARIABLE1 and EXAMPLE_DATA$NUMERICAL_VARIABLE2
## V = 111.5, p-value = 0.8227
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.9500783 1.2500660
## sample estimates:
## (pseudo)median
## 0.1500488
cor.test(~NUMERICAL_VARIABLE1 + NUMERICAL_VARIABLE2, EXAMPLE_DATA)
##
## Pearson's product-moment correlation
##
## data: NUMERICAL_VARIABLE1 and NUMERICAL_VARIABLE2
## t = 1.7861, df = 18, p-value = 0.09094
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06581448 0.70882202
## sample estimates:
## cor
## 0.3880067
fisher.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1, EXAMPLE_DATA$CATEGORICAL_VARIABLE2)
##
## Fisher's Exact Test for Count Data
##
## data: EXAMPLE_DATA$CATEGORICAL_VARIABLE1 and EXAMPLE_DATA$CATEGORICAL_VARIABLE2
## p-value = 0.06978
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.007870555 1.133635839
## sample estimates:
## odds ratio
## 0.1226533
chisq.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1, EXAMPLE_DATA$CATEGORICAL_VARIABLE2)
## Warning in chisq.test(EXAMPLE_DATA$CATEGORICAL_VARIABLE1,
## EXAMPLE_DATA$CATEGORICAL_VARIABLE2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: EXAMPLE_DATA$CATEGORICAL_VARIABLE1 and EXAMPLE_DATA$CATEGORICAL_VARIABLE2
## X-squared = 3.2323, df = 1, p-value = 0.0722
© Statistical Consulting Centre, University of Melbourne, 2023